#Details: ### This is to do a GSEA analysis using ONLY the MsigDB Hallmark pathways ### This is not done with clusterprofiler that I could find ### Clusterprofiler uses the Biological processes, molecular funtion, and cellular component options ### Generally speaking, what we want appears to be more niche than the base GSEA options ### So the next option is to find something that incorporates the MsigDB hallmark option #### What I have found is the CRAN package msigdbr and I am going to attempt that and see what happens #### I have checked for incorporation of msigdbr and clusterprofiler, and there is a sketchy documented way to do it

##CRAN Project Vignette: https://cran.r-project.org/web/packages/msigdbr/vignettes/msigdbr-intro.html

Options for this analysis (more in depth)

https://yulab-smu.top/biomedical-knowledge-mining-book/semantic-similarity-overview.html

Perks of the package: There are all of them msigdb options present (that I know of) AND multiple species It is ridiculously basic Formats make the tibble function easy to use without extra data wrangling

Set up (from the Vignette)

#this is from the first go round

gse <- GSEA(gene_list,  minGSSize = 15, 
             maxGSSize = 500, 
             nPermSimple= 1000,
             pvalueCutoff = 0.05, 
             verbose = TRUE, 
             pAdjustMethod = "none", TERM2GENE= m_t2g)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(...): There were 2 pathways for which P-values were
## not calculated properly due to unbalanced (positive and negative) gene-level
## statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
## can try to increase the value of the argument nPermSimple (for example set it
## nPermSimple = 10000)
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...

Adjusting for Min, Max, and Permutations to be the same as GSEA software Min Size: 15, Max Size 500, Permutations 1000 and no p adj method

require(DOSE)
## Loading required package: DOSE
## DOSE v3.20.1  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign) + theme(axis.text.y = element_text(size = 4)) 

require(DOSE)
dotplot(gse, showCategory=20, split=".sign") + facet_grid(.~.sign) + theme(axis.text.y = element_text(size = 4)) 

gse<-pairwise_termsim(gse, method = "JC", semData = NULL, showCategory = 200)
emapplot(gse, showCategory = 15)  

# categorySize can be either 'pvalue' or 'geneNum'
cnetplot(gse, categorySize="pvalue", foldChange=gene_list, showCategory = 10, node_label= "category", cex_label_category=0.5)

cnetplot(gse, categorySize="pvalue", foldChange=gene_list, showCategory = 20,  node_label= "category", cex_label_category=0.5)

ridgeplot(gse) + labs(x = "enrichment distribution") + theme(axis.text.y = element_text(size = 5)) 
## Picking joint bandwidth of 0.302

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P1 <- gseaplot(gse, y = "all", title = gse$Description[1], geneSetID = 1)
P1

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P2 <- gseaplot(gse, by = "all", title = gse$Description[2], geneSetID = 2)
P2

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P3<- gseaplot(gse, by = "all", title = gse$Description[3], geneSetID = 3)
P3

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P4<- gseaplot(gse, by = "all", title = gse$Description[4], geneSetID = 4)
P4

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P5<- gseaplot(gse, by = "all", title = gse$Description[5], geneSetID = 5)
P5

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P6<- gseaplot(gse, by = "all", title = gse$Description[6], geneSetID = 6)
P6

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P7<- gseaplot(gse, by = "all", title = gse$Description[7], geneSetID = 7)
P7

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P8<- gseaplot(gse, by = "all", title = gse$Description[8], geneSetID = 8)
P8

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P9<- gseaplot(gse, by = "all", title = gse$Description[9], geneSetID = 9)
P9

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P10<- gseaplot(gse, by = "all", title = gse$Description[10], geneSetID = 10)
P10

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P11<- gseaplot(gse, by = "all", title = gse$Description[11], geneSetID = 11)
P11

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P12<- gseaplot(gse, by = "all", title = gse$Description[12], geneSetID = 12)
P12

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P13<- gseaplot(gse, by = "all", title = gse$Description[13], geneSetID = 13)
P13

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P14<- gseaplot(gse, by = "all", title = gse$Description[14], geneSetID = 14)
P14

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P15<- gseaplot(gse, by = "all", title = gse$Description[15], geneSetID = 15)
P15

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P16<- gseaplot(gse, by = "all", title = gse$Description[16], geneSetID = 16)
P16

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P17<- gseaplot(gse, by = "all", title = gse$Description[17], geneSetID = 17)
P17

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P18<- gseaplot(gse, by = "all", title = gse$Description[18], geneSetID = 18)
P18

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P19<- gseaplot(gse, by = "all", title = gse$Description[19], geneSetID = 19)
P19

# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P20<- gseaplot(gse, by = "all", title = gse$Description[20], geneSetID = 20)
P20